This week the data for Tidy Tuesday comes from the US Census. It contains data by US county and there are a number of continuous variables such as unemployment rate, poverty rate and population data. This week I might try a scatterplot or two.
census_data <- read.csv('us_census.csv', stringsAsFactors = FALSE, header=TRUE, sep=",", na.strings="M")
head(census_data)
## CensusId State County TotalPop Men Women Hispanic White Black
## 1 1001 Alabama Autauga 55221 26745 28476 2.6 75.8 18.5
## 2 1003 Alabama Baldwin 195121 95314 99807 4.5 83.1 9.5
## 3 1005 Alabama Barbour 26932 14497 12435 4.6 46.2 46.7
## 4 1007 Alabama Bibb 22604 12073 10531 2.2 74.5 21.4
## 5 1009 Alabama Blount 57710 28512 29198 8.6 87.9 1.5
## 6 1011 Alabama Bullock 10678 5660 5018 4.4 22.2 70.7
## Native Asian Pacific Citizen Income IncomeErr IncomePerCap
## 1 0.4 1.0 0 40725 51281 2391 24974
## 2 0.6 0.7 0 147695 50254 1263 27317
## 3 0.2 0.4 0 20714 32964 2973 16824
## 4 0.4 0.1 0 17495 38678 3995 18431
## 5 0.3 0.1 0 42345 45813 3141 20532
## 6 1.2 0.2 0 8057 31938 5884 17580
## IncomePerCapErr Poverty ChildPoverty Professional Service Office
## 1 1080 12.9 18.6 33.2 17.0 24.2
## 2 711 13.4 19.2 33.1 17.7 27.1
## 3 798 26.7 45.3 26.8 16.1 23.1
## 4 1618 16.8 27.9 21.5 17.9 17.8
## 5 708 16.7 27.2 28.5 14.1 23.9
## 6 2055 24.6 38.4 18.8 15.0 19.7
## Construction Production Drive Carpool Transit Walk OtherTransp
## 1 8.6 17.1 87.5 8.8 0.1 0.5 1.3
## 2 10.8 11.2 84.7 8.8 0.1 1.0 1.4
## 3 10.8 23.1 83.8 10.9 0.4 1.8 1.5
## 4 19.0 23.7 83.2 13.5 0.5 0.6 1.5
## 5 13.5 19.9 84.9 11.2 0.4 0.9 0.4
## 6 20.1 26.4 74.9 14.9 0.7 5.0 1.7
## WorkAtHome MeanCommute Employed PrivateWork PublicWork SelfEmployed
## 1 1.8 26.5 23986 73.6 20.9 5.5
## 2 3.9 26.4 85953 81.5 12.3 5.8
## 3 1.6 24.1 8597 71.8 20.8 7.3
## 4 0.7 28.8 8294 76.8 16.1 6.7
## 5 2.3 34.9 22189 82.0 13.5 4.2
## 6 2.8 27.5 3865 79.5 15.1 5.4
## FamilyWork Unemployment
## 1 0.0 7.6
## 2 0.4 7.5
## 3 0.1 17.6
## 4 0.4 8.3
## 5 0.4 7.7
## 6 0.0 18.0
tail(census_data)
## CensusId State County TotalPop Men Women Hispanic White
## 3215 72143 Puerto Rico Vega Alta 39319 18762 20557 98.6 1.1
## 3216 72145 Puerto Rico Vega Baja 56858 27379 29479 96.4 3.4
## 3217 72147 Puerto Rico Vieques 9130 4585 4545 96.7 2.9
## 3218 72149 Puerto Rico Villalba 24685 12086 12599 99.7 0.0
## 3219 72151 Puerto Rico Yabucoa 36279 17648 18631 99.8 0.2
## 3220 72153 Puerto Rico Yauco 39474 19047 20427 99.5 0.5
## Black Native Asian Pacific Citizen Income IncomeErr IncomePerCap
## 3215 0.0 0 0.0 0 29570 18016 1334 9918
## 3216 0.1 0 0.0 0 43656 16948 1234 9102
## 3217 0.0 0 0.0 0 7085 18104 3771 8821
## 3218 0.0 0 0.0 0 18458 17818 1255 8420
## 3219 0.0 0 0.1 0 27924 15627 1836 7960
## 3220 0.0 0 0.0 0 30661 14708 1245 7743
## IncomePerCapErr Poverty ChildPoverty Professional Service Office
## 3215 1103 50.3 63.8 29.5 19.5 29.2
## 3216 538 48.5 56.1 30.1 21.6 24.7
## 3217 939 39.1 58.1 18.4 38.2 18.5
## 3218 486 54.0 68.3 23.6 21.0 21.8
## 3219 512 52.3 62.1 25.9 25.9 20.7
## 3220 474 51.7 56.3 28.1 20.5 26.7
## Construction Production Drive Carpool Transit Walk OtherTransp
## 3215 9.2 12.7 83.4 7.7 2.6 3.1 2.3
## 3216 9.2 14.3 89.1 7.1 0.9 1.2 1.3
## 3217 15.7 9.1 69.1 17.8 1.0 10.8 0.0
## 3218 13.9 19.7 82.0 11.4 0.1 3.2 0.0
## 3219 12.8 14.7 86.0 7.9 0.0 2.3 2.3
## 3220 14.2 10.5 84.8 8.5 1.2 1.6 0.7
## WorkAtHome MeanCommute Employed PrivateWork PublicWork SelfEmployed
## 3215 0.9 35.0 9804 75.7 20.3 4.1
## 3216 0.3 32.0 13660 78.3 17.6 4.1
## 3217 1.4 14.0 2860 44.5 41.6 13.6
## 3218 3.3 26.9 6795 59.2 27.5 13.1
## 3219 1.5 29.5 8083 65.1 27.6 7.3
## 3220 3.1 24.6 8923 68.0 27.6 4.4
## FamilyWork Unemployment
## 3215 0.0 21.7
## 3216 0.0 15.2
## 3217 0.3 12.2
## 3218 0.2 25.9
## 3219 0.0 24.3
## 3220 0.0 27.1
View a summary of the US census data variables
## CensusId State County TotalPop
## Min. : 1001 Length:3220 Length:3220 Min. : 85
## 1st Qu.:19033 Class :character Class :character 1st Qu.: 11218
## Median :30024 Mode :character Mode :character Median : 26035
## Mean :31394 Mean : 99409
## 3rd Qu.:46106 3rd Qu.: 66430
## Max. :72153 Max. :10038388
##
## Men Women Hispanic White
## Min. : 42 Min. : 43 Min. : 0.000 Min. : 0.00
## 1st Qu.: 5637 1st Qu.: 5572 1st Qu.: 1.900 1st Qu.:64.10
## Median : 12932 Median : 13057 Median : 3.900 Median :84.10
## Mean : 48897 Mean : 50512 Mean :11.012 Mean :75.43
## 3rd Qu.: 32993 3rd Qu.: 33488 3rd Qu.: 9.825 3rd Qu.:93.20
## Max. :4945351 Max. :5093037 Max. :99.900 Max. :99.80
##
## Black Native Asian Pacific
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.00000
## 1st Qu.: 0.500 1st Qu.: 0.100 1st Qu.: 0.200 1st Qu.: 0.00000
## Median : 1.900 Median : 0.300 Median : 0.500 Median : 0.00000
## Mean : 8.665 Mean : 1.724 Mean : 1.229 Mean : 0.08273
## 3rd Qu.: 9.600 3rd Qu.: 0.600 3rd Qu.: 1.200 3rd Qu.: 0.00000
## Max. :85.900 Max. :92.100 Max. :41.600 Max. :35.30000
##
## Citizen Income IncomeErr IncomePerCap
## Min. : 80 Min. : 10499 Min. : 270 Min. : 5878
## 1st Qu.: 8450 1st Qu.: 38192 1st Qu.: 1635 1st Qu.:20239
## Median : 19643 Median : 44749 Median : 2406 Median :23460
## Mean : 69935 Mean : 46130 Mean : 2850 Mean :23982
## 3rd Qu.: 49920 3rd Qu.: 52074 3rd Qu.: 3446 3rd Qu.:27053
## Max. :6046749 Max. :123453 Max. :21355 Max. :65600
## NA's :1 NA's :1
## IncomePerCapErr Poverty ChildPoverty Professional
## Min. : 113 Min. : 1.40 Min. : 0.00 Min. :13.50
## 1st Qu.: 755 1st Qu.:12.10 1st Qu.:16.30 1st Qu.:26.70
## Median : 1096 Median :16.15 Median :22.70 Median :29.90
## Mean : 1363 Mean :17.49 Mean :24.18 Mean :30.99
## 3rd Qu.: 1631 3rd Qu.:20.70 3rd Qu.:30.00 3rd Qu.:34.40
## Max. :15266 Max. :64.20 Max. :81.60 Max. :74.00
## NA's :1
## Service Office Construction Production
## Min. : 5.00 Min. : 4.10 Min. : 1.70 Min. : 0.00
## 1st Qu.:16.00 1st Qu.:20.20 1st Qu.: 9.80 1st Qu.:11.50
## Median :18.10 Median :22.40 Median :12.10 Median :15.25
## Mean :18.35 Mean :22.22 Mean :12.71 Mean :15.73
## 3rd Qu.:20.30 3rd Qu.:24.40 3rd Qu.:14.90 3rd Qu.:19.32
## Max. :38.20 Max. :35.40 Max. :40.30 Max. :55.60
##
## Drive Carpool Transit Walk
## Min. : 5.20 Min. : 0.00 Min. : 0.0000 Min. : 0.000
## 1st Qu.:76.60 1st Qu.: 8.40 1st Qu.: 0.1000 1st Qu.: 1.400
## Median :80.70 Median : 9.90 Median : 0.4000 Median : 2.400
## Mean :79.18 Mean :10.28 Mean : 0.9718 Mean : 3.324
## 3rd Qu.:83.70 3rd Qu.:11.80 3rd Qu.: 0.8000 3rd Qu.: 4.000
## Max. :94.60 Max. :29.90 Max. :61.7000 Max. :71.200
##
## OtherTransp WorkAtHome MeanCommute Employed
## Min. : 0.000 Min. : 0.000 Min. : 4.90 Min. : 62
## 1st Qu.: 0.900 1st Qu.: 2.700 1st Qu.:19.50 1st Qu.: 4551
## Median : 1.300 Median : 3.900 Median :23.00 Median : 10508
## Mean : 1.613 Mean : 4.632 Mean :23.28 Mean : 45594
## 3rd Qu.: 1.900 3rd Qu.: 5.600 3rd Qu.:26.80 3rd Qu.: 28633
## Max. :39.100 Max. :37.200 Max. :44.00 Max. :4635465
##
## PrivateWork PublicWork SelfEmployed FamilyWork
## Min. :25.00 Min. : 5.80 Min. : 0.000 Min. :0.0000
## 1st Qu.:70.50 1st Qu.:13.10 1st Qu.: 5.400 1st Qu.:0.1000
## Median :75.70 Median :16.20 Median : 6.900 Median :0.2000
## Mean :74.22 Mean :17.56 Mean : 7.932 Mean :0.2881
## 3rd Qu.:79.70 3rd Qu.:20.50 3rd Qu.: 9.400 3rd Qu.:0.3000
## Max. :88.30 Max. :66.20 Max. :36.600 Max. :9.8000
##
## Unemployment
## Min. : 0.000
## 1st Qu.: 5.500
## Median : 7.600
## Mean : 8.094
## 3rd Qu.: 9.900
## Max. :36.500
##
#Make a variable with County and State combined
census_data2 <- unite(census_data, col='CountyState', County, State, sep=", ", remove=FALSE)
sp <- ggplot (census_data2, aes ( x = IncomePerCap , y = Poverty, color = Unemployment, label=CountyState)) + geom_point () + theme_minimal() + scale_colour_gradientn ( colours = c ("yellow", "orange", "darkred")) + labs(x="Income per capita ($US)", y="Poverty (%)", title="Income vs poverty, US counties", subtitle="County-level American Community Survey (5-year estimates) 2015", caption="Data source: census.gov")
sp
ggplotly(sp, tooltip = c("label", "x", "y", "color"))
Most of the counties that have low income per capita and high poverty and unemployment rates are in Puerto Rico.
ggplot(census_data2, aes(x = IncomePerCap, y = State)) + geom_density_ridges( color="#FFFFFF", fill="#56B4E9") + theme_minimal() + labs(x="Income per capita ($US)", title = "Income per capita distribution, USA, 2015",
caption = "Data source: census.gov")
ggplot(census_data2, aes(x = fct_reorder(State, IncomePerCap, fun = median), y = IncomePerCap)) + geom_boxplot(fill="#56B4E9") + theme_minimal() + labs(x = "State", y="Income per capita ($US)", title = "Income per capita distribution, USA, 2015",
caption = "Data source: census.gov") + coord_flip()